{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "from darts import SeasonalityMode\n",
    "from darts.models import NaiveSeasonal, NaiveDrift\n",
    "from darts.models import Prophet, ExponentialSmoothing, AutoARIMA, Theta, FourTheta\n",
    "from darts.models import LinearRegressionModel\n",
    "from darts.utils.statistics import check_seasonality, remove_seasonality, extract_trend_and_seasonality\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm import tqdm_notebook as tqdm\n",
    "\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from M4_metrics import owa_m4, smape_m4, mase_m4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "from darts.utils.timeseries_generation import constant_timeseries as ct"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "logging.disable(logging.CRITICAL)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "download = True\n",
    "preprocess = True"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Download and create TimeSeries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "M4-info already exists\n",
      "Yearly-train already exists\n",
      "Yearly-test already exists\n",
      "Quarterly-train already exists\n",
      "Quarterly-test already exists\n",
      "Monthly-train already exists\n",
      "Monthly-test already exists\n",
      "Weekly-train already exists\n",
      "Weekly-test already exists\n",
      "Daily-train already exists\n",
      "Daily-test already exists\n",
      "Hourly-train already exists\n",
      "Hourly-test already exists\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5cc8b953ac2a47f6ade4a90f091ef37b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "509d0310b9734210be674a6e78e6a42f",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "HBox(children=(FloatProgress(value=0.0, max=23000.0), HTML(value='')))"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "if download:\n",
    "    %run -i \"download_data_M4.py\"\n",
    "if preprocess:\n",
    "    %run -i \"create_ts.py\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### dataset info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# data_categories = ['Macro', 'Micro', 'Demographic', 'Industry', 'Finance', 'Other']\n",
    "data_freq = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']\n",
    "info_dataset = pd.read_csv('dataset/M4-info.csv', delimiter=',').set_index('M4id')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "info_dataset.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "info_dataset.filter(regex='W', axis=0).category.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "info_dataset.groupby('SP').count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(info_dataset.groupby('SP').Frequency.unique())\n",
    "print(info_dataset.groupby('SP').Horizon.unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### evaluating methods "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Hourly'\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mase_all = []\n",
    "smape_all = []\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "for train, test in tqdm(zip(ts_train, ts_test)):\n",
    "    # remove seasonality\n",
    "    train_des=train\n",
    "    seasonOut = ct(length=len(test), freq=train.freq_str, start_ts=test.start_time())\n",
    "    season = ct(length=len(train), freq=train.freq_str, start_ts=train.start_time())\n",
    "    if m > 1:\n",
    "        if check_seasonality(train, m=m, max_lag=2*m):\n",
    "            _, season = extract_trend_and_seasonality(train, m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "            train_des = remove_seasonality(train, freq=m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "            seasonOut = season[-m:].shift(m)\n",
    "            seasonOut = seasonOut.append_values(seasonOut.values())[:len(test)]\n",
    "    # model selection\n",
    "    naiveSeason = NaiveSeasonal(K=m)\n",
    "    naive2 = NaiveSeasonal(K=1)\n",
    "    ses = ExponentialSmoothing(trend=None, seasonal=None)\n",
    "    holt = ExponentialSmoothing(seasonal=None, damped=False, trend='additive')\n",
    "    damp = ExponentialSmoothing(seasonal=None, damped=True, trend='additive')\n",
    "    fourtheta = FourTheta.select_best_model(train, thetas=[1,2,3], m=m)\n",
    "    # model fitting\n",
    "    naiveSeason.fit(train)\n",
    "    naive2.fit(train_des)\n",
    "    fourtheta.fit(train)\n",
    "    ses.fit(train_des)\n",
    "    holt.fit(train_des)\n",
    "    damp.fit(train_des)\n",
    "    # forecasting\n",
    "    forecast_naiveSeason = naiveSeason.predict(len(test))\n",
    "    forecast_naive2 = naive2.predict(len(test)) * seasonOut\n",
    "    forecast_fourtheta = fourtheta.predict(len(test))\n",
    "    forecast_ses = ses.predict(len(test))*seasonOut\n",
    "    forecast_holt = holt.predict(len(test))*seasonOut\n",
    "    forecast_damp = damp.predict(len(test))*seasonOut\n",
    "    # baseline constant weight ensembling\n",
    "    forecast_comb = ((forecast_ses + forecast_holt + forecast_damp) / 3)\n",
    "    \n",
    "    mase_all.append(np.vstack([\n",
    "                               mase_m4(train, test, forecast_naiveSeason, m=m),\n",
    "                               mase_m4(train, test, forecast_naive2, m=m),\n",
    "                               mase_m4(train, test, forecast_fourtheta, m=m),\n",
    "                               mase_m4(train, test, forecast_ses, m=m),\n",
    "                               mase_m4(train, test, forecast_holt, m=m),\n",
    "                               mase_m4(train, test, forecast_damp, m=m),\n",
    "                               mase_m4(train, test, forecast_comb, m=m),\n",
    "                              ]))\n",
    "    smape_all.append(np.vstack([\n",
    "                                smape_m4(test, forecast_naiveSeason),\n",
    "                                smape_m4(test, forecast_naive2),\n",
    "                                smape_m4(test, forecast_fourtheta),\n",
    "                                smape_m4(test, forecast_ses),\n",
    "                                smape_m4(test, forecast_holt),\n",
    "                                smape_m4(test, forecast_damp),\n",
    "                                smape_m4(test, forecast_comb),\n",
    "                               ]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"MASE; naiveSeason: {:.3f}, naive2: {:.3f}, 4Theta: {:.3f},\\n\"\n",
    "       \"SES: {:.3f}, Holt: {:.3f}, Damp: {:.3f}, Comb: {:.3f}\\n\".format(*tuple(np.stack(mase_all).mean(axis=(0,2)))))\n",
    "print(\"sMAPE; naiveSeason: {:.3f}, naive2: {:.3f}, 4Theta: {:.3f},\\n\"\n",
    "       \"SES: {:.3f}, Holt: {:.3f}, Damp: {:.3f}, Comb: {:.3f}\\n\".format(*tuple(np.stack(smape_all).mean(axis=(0,2)))))\n",
    "print(\"OWA; naiveSeason: {:.3f}, naive2: {:.3f}, 4Theta: {:.3f},\\n\"\n",
    "       \"SES: {:.3f}, Holt: {:.3f}, Damp: {:.3f}, Comb: {:.3f}\\n\".format(*tuple(owa_m4(freq, \n",
    "                                                                        np.stack(smape_all).mean(axis=(0,2)), \n",
    "                                                                        np.stack(mase_all).mean(axis=(0,2))))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train[-2*m:].plot(label='train')\n",
    "test.plot(label='test')\n",
    "forecast_naiveSeason.plot(label='naive seasonal')\n",
    "forecast_naive2.plot(label='naive2')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.hist(np.nanmean(np.stack(mase_all), axis=(2,))[:,3], bins=100, label='4Theta')\n",
    "plt.hist(np.nanmean(np.stack(mase_all), axis=(2,))[:,0], bins=30, label='naiveSeason', alpha=0.7)\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Monthly'\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "train_des=train\n",
    "seasonOut = ct(length=len(test), freq=train.freq_str, start_ts=test.start_time())\n",
    "season = ct(length=len(train), freq=train.freq_str, start_ts=train.start_time())\n",
    "if m > 1:\n",
    "    if check_seasonality(train, m=int(m), max_lag=2*m):\n",
    "        _, season = extract_trend_and_seasonality(train, m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "        train_des = remove_seasonality(train, freq=m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "        seasonOut = season[-m:].shift(m)\n",
    "        seasonOut = seasonOut.append_values(seasonOut.values())[:len(test)]\n",
    "    else:\n",
    "        m = 1\n",
    "# model selection\n",
    "naiveSeason = NaiveSeasonal(K=m)\n",
    "naive2 = NaiveSeasonal(K=1)\n",
    "prophet = Prophet(yearly_seasonality=True, changepoint_range=0.95)\n",
    "arima = AutoARIMA()\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[1, 2, 3], m=m)\n",
    "# model fitting\n",
    "naiveSeason.fit(train)\n",
    "naive2.fit(train_des)\n",
    "fourtheta.fit(train)\n",
    "prophet.fit(train)\n",
    "arima.fit(train)\n",
    "# forecasting\n",
    "forecast_naiveSeason = naiveSeason.predict(len(test))\n",
    "forecast_naive2 = naive2.predict(len(test)) * seasonOut\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_arima = arima.predict(len(test))\n",
    "forecast_prophet = prophet.predict(len(test))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train[-m:].plot(label='train')\n",
    "test.plot(label='test', lw=3)\n",
    "forecast_naiveSeason.plot(label='naiveS')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_naive2.plot(label='naive2')\n",
    "forecast_arima.plot(label='ARIMA')\n",
    "forecast_prophet.pd_series().plot(label='prophet')\n",
    "plt.legend()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### 4Theta"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Yearly'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Quarterly'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Monthly'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Weekly'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Daily'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 1\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Hourly'\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 0\n",
    "train = ts_train[_id]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=[2], m=m)\n",
    "theta = Theta(seasonality_period=m)\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, let's try to find a better theta, and a better frequency"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq = 'Weekly'\n",
    "plt.figure()\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "_id = 2\n",
    "train = ts_train[_id][-100:]\n",
    "test = ts_test[_id]\n",
    "fourtheta = FourTheta.select_best_model(train, thetas=np.linspace(-1, 10, 90), m=None)\n",
    "theta = Theta(theta=(2 - fourtheta.theta))\n",
    "print(fourtheta)\n",
    "\n",
    "fourtheta.fit(train)\n",
    "theta.fit(train)\n",
    "\n",
    "forecast_fourtheta = fourtheta.predict(len(test))\n",
    "forecast_theta = theta.predict(len(test))\n",
    "\n",
    "test.plot(label='test')\n",
    "forecast_fourtheta.plot(label='4theta')\n",
    "forecast_theta.plot(label='classic theta')\n",
    "plt.title(freq + ' frequency')\n",
    "print(\"Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_theta))))\n",
    "print(\"4Theta MASE: {:.3f}\".format(np.mean(mase_m4(train, test, forecast_fourtheta))))\n",
    "plt.legend();"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### run evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_baseline = False\n",
    "if run_baseline:\n",
    "    %run -i \"evaluate_baselines.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_thetas = False\n",
    "if run_thetas:\n",
    "    %run -i \"evaluate_theta_methods.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_fft = False\n",
    "if run_fft:\n",
    "    %run -i \"evaluate_fft.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_arima = False\n",
    "if run_arima:\n",
    "    %run -i \"evaluate_arima.py\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "run_prophet = False\n",
    "if run_prophet:\n",
    "    %run -i \"evaluate_prophet.py\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Ensembling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LassoCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from evaluate_ensembling import naive2_groe, groe_owa, DeseasonForecastingModel\n",
    "deseason_model = DeseasonForecastingModel"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "freq = 'Monthly'\n",
    "ts_train = pickle.load(open(\"dataset/train_\"+freq+\".pkl\", \"rb\"))\n",
    "ts_test = pickle.load(open(\"dataset/test_\"+freq+\".pkl\", \"rb\"))\n",
    "\n",
    "mase_all = []\n",
    "smape_all = []\n",
    "m = int(info_dataset.Frequency[freq[0]+'1'])\n",
    "for train, test in tqdm(zip(ts_train[:5], ts_test[:5])):\n",
    "    # remove seasonality\n",
    "    train_des=train\n",
    "    seasonOut = 1\n",
    "    season = ct(length=len(train), freq=train.freq_str, start_ts=train.start_time())\n",
    "    if m > 1:\n",
    "        if check_seasonality(train, m=int(m), max_lag=2*m):\n",
    "            pass\n",
    "            _, season = extract_trend_and_seasonality(train, m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "            train_des = remove_seasonality(train, freq=m, model=SeasonalityMode.MULTIPLICATIVE)\n",
    "            seasonOut = season[-m:].shift(m)\n",
    "            seasonOut = seasonOut.append_values(seasonOut.values())[:len(test)]\n",
    "    # model choice\n",
    "    naiveSeason = NaiveSeasonal(K=m)\n",
    "    naiveDrift = NaiveDrift()\n",
    "    naive2 = NaiveSeasonal(K=1)\n",
    "    ses = ExponentialSmoothing(trend=None, seasonal=None, seasonal_periods=m)\n",
    "    holt = ExponentialSmoothing(seasonal=None, damped=False, trend='additive', seasonal_periods=m)\n",
    "    damp = ExponentialSmoothing(seasonal=None, damped=True, trend='additive', seasonal_periods=m)\n",
    "    # prophet = Prophet(yearly_seasonality=True, weekly_seasonality=False, daily_seasonality=False)\n",
    "    \n",
    "    fourtheta = FourTheta.select_best_model(train, thetas=[1, 2, 3], m=m)\n",
    "    theta = Theta(theta=2, season_mode=SeasonalityMode.MULTIPLICATIVE, seasonality_period=m)\n",
    "    models_simple = [naiveSeason, theta, fourtheta]\n",
    "    models_des = [naive2, ses, holt, damp]\n",
    "\n",
    "    # linear regression (with constraints)\n",
    "    def train_pred(id_start=None, id_end=None):\n",
    "        for m in models_simple:\n",
    "            m.fit(train[id_start:id_end])\n",
    "        for m in models_des:\n",
    "            m.fit(train_des[id_start:id_end])\n",
    "        models_simple_predictions = [m.predict(len(test))\n",
    "                                     for m in models_simple]\n",
    "        id_fin = id_end+len(test)\n",
    "        if id_fin == 0:\n",
    "            id_fin = None\n",
    "        models_des_predictions = [m.predict(len(test)) * (seasonOut if id_end is None else season[id_end:id_fin])\n",
    "                                  for m in models_des]\n",
    "\n",
    "        model_predictions = models_simple_predictions + models_des_predictions\n",
    "        \n",
    "        return model_predictions\n",
    "    \n",
    "    val_predictions = train_pred(id_end=-len(test))\n",
    "\n",
    "    regr_model = LinearRegressionModel(train_n_points=len(test), \n",
    "                                         model=LassoCV(positive=True, fit_intercept=False, max_iter=5000))\n",
    "    target_val = train.slice_intersect(val_predictions[0])\n",
    "    regr_model.fit(val_predictions, target_val)\n",
    "    bktest_pred = val_predictions\n",
    "    \n",
    "    for mod in models_simple:\n",
    "        mod.fit(train)\n",
    "    for mod in models_des:\n",
    "        mod.fit(train_des)\n",
    "    \n",
    "    models_simple_predictions = [mod.predict(len(test))\n",
    "                                 for mod in models_simple]\n",
    "    models_des_predictions = [mod.predict(len(test)) * seasonOut\n",
    "                              for mod in models_des]\n",
    "\n",
    "    model_predictions = models_simple_predictions + models_des_predictions\n",
    "    \n",
    "    regr_model.model.coef_ = regr_model.model.coef_/np.sum(regr_model.model.coef_)\n",
    "    \n",
    "    ensemble_pred = regr_model.predict(model_predictions)\n",
    "    \n",
    "    # Mean ensembling\n",
    "    mean_pred = 0\n",
    "    for pred in model_predictions:\n",
    "        mean_pred = pred + mean_pred\n",
    "    mean_pred = mean_pred/len(model_predictions)\n",
    "    \n",
    "    ## GROE OWA\n",
    "    criterion = []\n",
    "    criterion.append(groe_owa(train, naiveSeason, max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, theta, max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, fourtheta, max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, deseason_model(NaiveSeasonal(K=1), m), max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, deseason_model(ses, m), max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, deseason_model(holt, m), max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    criterion.append(groe_owa(train, deseason_model(damp, m), max(5, len(train)-len(test)), int(np.floor(len(test)/6)), 6, m))\n",
    "    \n",
    "    Score = 1/np.array(criterion)\n",
    "    pesos = Score/Score.sum()\n",
    "    \n",
    "    groe_ensemble = 0\n",
    "    for prediction, weight in zip(model_predictions, pesos):\n",
    "        groe_ensemble = prediction * weight + groe_ensemble\n",
    "    \n",
    "    # BO3 ensembling\n",
    "    score = np.argsort(Score)[::-1][:3]\n",
    "    pesos2 = Score[score]/Score[score].sum()\n",
    "    \n",
    "    bo3_ensemble = 0\n",
    "    bo3_mean = 0\n",
    "    for i, model in enumerate(score):\n",
    "            bo3_ensemble = model_predictions[model]*pesos2[i] + bo3_ensemble\n",
    "            bo3_mean = model_predictions[model]/len(score) + bo3_mean\n",
    "    \n",
    "    mase_all.append(np.vstack([\n",
    "                               mase_m4(train, test, models_des_predictions[0], m=m),\n",
    "                               mase_m4(train, test, ensemble_pred, m=m),\n",
    "                               mase_m4(train, test, mean_pred, m=m),\n",
    "                               mase_m4(train, test, groe_ensemble, m=m),\n",
    "                               mase_m4(train, test, bo3_ensemble, m=m),\n",
    "                               mase_m4(train, test, bo3_mean, m=m),\n",
    "                              ]))\n",
    "    smape_all.append(np.vstack([\n",
    "                                smape_m4(test, models_des_predictions[0]),\n",
    "                                smape_m4(test, ensemble_pred),\n",
    "                                smape_m4(test, mean_pred),\n",
    "                                smape_m4(test, groe_ensemble),\n",
    "                                smape_m4(test, bo3_ensemble),\n",
    "                                smape_m4(test, bo3_mean),\n",
    "                               ]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"MASE; Naive2: {:.3f}, Linear Regression: {:.3f}, Mean ensembling: {:.3f}, GROE ensembling: {:.3f}, \"\n",
    "      \"BO3 ensembling: {:.3f}, BO3 Mean: {:.3f}\".format(*tuple(np.nanmean(np.stack(mase_all), axis=(0, 2)))))\n",
    "print(\"sMAPE; Naive2: {:.3f}, Linear Regression: {:.3f}, Mean ensembling: {:.3f}, GROE ensembling: {:.3f}, \"\n",
    "      \"BO3 ensembling: {:.3f}, BO3 Mean: {:.3f}\".format(*tuple(np.nanmean(np.stack(smape_all), axis=(0, 2)))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"OWA: \", owa_m4(freq,\n",
    "                      np.nanmean(np.stack(mase_all), axis=(0, 2)),\n",
    "                      np.nanmean(np.stack(smape_all), axis=(0, 2))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Visualization"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Linear Regression (Lasso)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = ['naiveS', 'theta', '4theta', 'naive2','SES', 'holt', 'damped']\n",
    "target_val.plot(label=\"target\", lw=3)\n",
    "pred = regr_model.predict(bktest_pred)\n",
    "pred.plot(label=\"ensemble\", lw=3)\n",
    "for i, mod in enumerate(bktest_pred):\n",
    "    mod.plot(label=labels[i])\n",
    "plt.legend()\n",
    "plt.title(\"Validation MASE = {:.3f}\".format(np.mean(mase_m4(train[:-len(test)], train[-len(test):], pred, m=m))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "regr_model.model.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = ['naiveS', 'theta', '4theta', 'naive2','SES', 'holt', 'damped']\n",
    "test.plot(label=\"target\", lw=3)\n",
    "ensemble_pred.plot(label=\"ensemble\", lw=3)\n",
    "for i,mod in enumerate(model_predictions):\n",
    "    mod.plot(label=labels[i])\n",
    "plt.legend()\n",
    "plt.title(\"Test MASE = {:.3f}\".format(np.mean(mase_m4(train, test, ensemble_pred, m=m))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "GROE and other Ensembling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "test.plot(label=\"target\", lw=3)\n",
    "mean_pred.plot(label='mean ensemble', lw=3)\n",
    "groe_ensemble.plot(label=\"groe ensemble\", lw=3)\n",
    "bo3_mean.plot(label=\"bo3 ensemble\")\n",
    "plt.legend()\n",
    "plt.title(\"MASE mean = {:.3f}, GROE = {:.3f}\".format(np.mean(mase_m4(train, test, mean_pred, m=m)), \n",
    "                                                     np.mean(mase_m4(train, test, groe_ensemble, m=m))))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# GROE weights\n",
    "pesos"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  },
  "pycharm": {
   "stem_cell": {
    "cell_type": "raw",
    "metadata": {
     "collapsed": false
    },
    "source": []
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
